import wrds
import pandas as pd
import numpy as np
import plotly.express as px
conn=wrds.Connection()
WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully. Loading library list... Done
df= conn.raw_sql(f"""select conm, gvkey, fyear,
ni,
at,
lt
from comp.funda a
where a.fyear>= 2019 and
a.fyear<=2020 and
a.indfmt= 'INDL' and
a.datafmt = 'STD' and
a.popsrc= 'D' and
a.consol = 'C' and
(a.sich < 6000 or a.sich > 6999)
""")
df= df.sort_values(['gvkey','fyear'])
df= df.reset_index(drop=True)
df['fyear']= df['fyear'].astype(int)
df['log_at']= np.log10(1+df['at'])
df['lag_at']= df['at'].shift(1)
df['ROA']= df['ni'] / df['lag_at']
df['leverage']= df['lt'] / df['at']
print(df.shape)
df.head()
(10461, 10)
| conm | gvkey | fyear | ni | at | lt | log_at | lag_at | ROA | leverage | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AAR CORP | 001004 | 2019 | 4.400 | 2079.000 | 1176.40 | 3.318063 | NaN | NaN | 0.565849 |
| 1 | AAR CORP | 001004 | 2020 | 35.800 | 1539.700 | 565.30 | 3.187718 | 2079.0 | 0.017220 | 0.367149 |
| 2 | AMERICAN AIRLINES GROUP INC | 001045 | 2019 | 1686.000 | 59995.000 | 60113.00 | 4.778122 | 1539.7 | 1.095019 | 1.001967 |
| 3 | AMERICAN AIRLINES GROUP INC | 001045 | 2020 | -8885.000 | 62008.000 | 68875.00 | 4.792455 | 59995.0 | -0.148096 | 1.110744 |
| 4 | CECO ENVIRONMENTAL CORP | 001050 | 2019 | 17.707 | 408.637 | 215.62 | 2.612399 | 62008.0 | 0.000286 | 0.527657 |
df=df[df.fyear==2020]
df= df.dropna(how='any')
print(df.shape)
df.head()
(5135, 10)
| conm | gvkey | fyear | ni | at | lt | log_at | lag_at | ROA | leverage | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | AAR CORP | 001004 | 2020 | 35.800 | 1539.700 | 565.300 | 3.187718 | 2079.000 | 0.017220 | 0.367149 |
| 3 | AMERICAN AIRLINES GROUP INC | 001045 | 2020 | -8885.000 | 62008.000 | 68875.000 | 4.792455 | 59995.000 | -0.148096 | 1.110744 |
| 5 | CECO ENVIRONMENTAL CORP | 001050 | 2020 | 8.211 | 419.314 | 215.703 | 2.623574 | 408.637 | 0.020094 | 0.514419 |
| 7 | PINNACLE WEST CAPITAL CORP | 001075 | 2020 | 550.559 | 20020.421 | 14267.628 | 4.301495 | 18479.247 | 0.029793 | 0.712654 |
| 10 | ABBOTT LABORATORIES | 001078 | 2020 | 4495.000 | 72548.000 | 39545.000 | 4.860631 | 67887.000 | 0.066213 | 0.545087 |
df= df.sample(n=200)
fig = px.scatter(df[(df.ROA>-1)&(df.ROA<1) & (df.leverage<3)], x="ROA", y="leverage",
hover_name="conm", hover_data=["lag_at", "at", "lt"], size='log_at', width= 800, height= 500,
title= "Scatter plot of ROA-leverage: Using Compustat sample in 2020 excluding outliers")
fig.show()
conn.close()